Chapter 10 Joint Species Distribution Modelling - output analysis

rm(list=ls()) #clear environment
load("data/squirrels_data.Rdata")
options(contrasts = c('contr.sum','contr.poly'))

# Select desired support threshold
support_threshold=0.9
negsupport_threshold=1-support_threshold

red_samples <- sample_metadata %>%
  filter(species == "Sciurus vulgaris") %>%
  dplyr::select(sample) %>%
  pull()

grey_samples <- sample_metadata %>%
  filter(species=="Sciurus carolinensis") %>%
  dplyr::select(sample) %>% pull()
# Load modelchains 
load("hmsc/fit_model.red_250_10.Rdata")
m.red <- m
cv.red <- cv

load("hmsc/fit_model.grey_250_10.Rdata")
m.grey <- m
cv.grey <- cv

rm(m,cv)

levels <- c("index500", "season", "logseqdepth", "Random: animal", "Random: sampling_site")

# Basal tree
hmsc_tree <- genome_tree %>%
        keep.tip(., tip=m.red$spNames) 

10.1 Model fit

10.1.1 Explanatory power

# Red squirrel
preds <- computePredictedValues(m.red) 
MF <- evaluateModelFit(hM=m.red, predY=preds) 
hist(MF$R2, xlim = c(0,1), main=paste0("Mean = ", round(mean(MF$R2),2)))

# Grey squirrel
preds <- computePredictedValues(m.grey) 
MF <- evaluateModelFit(hM=m.grey, predY=preds) 
hist(MF$R2, xlim = c(0,1), main=paste0("Mean = ", round(mean(MF$R2),2)))

10.1.2 Predictive power

# Compute variance partitioning
varpart.red=computeVariancePartitioning(m.red)

varpart.red$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=levels)) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_ukj8pe6prr40yi3tasim
variable mean sd
index500 4.153817 5.899410
season 17.565400 7.312821
logseqdepth 7.047628 5.859817
Random: animal 59.430963 18.634969
Random: sampling_site 11.802193 13.741861
# Compute variance partitioning
varpart.grey=computeVariancePartitioning(m.grey)

varpart.grey$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=levels)) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_emsk8ao1jjl36nyepg8y
variable mean sd
index500 3.141512 3.309444
season 4.515606 5.616803
logseqdepth 33.570083 22.785619
Random: animal 46.637773 23.876555
Random: sampling_site 12.135027 14.569624
# Red squirrel
MFCV.red <- evaluateModelFit(hM=m.red, predY=cv.red)
mean(MFCV.red$R2, na.rm = TRUE)
[1] 0.1413092
var_pred_table.red <- tibble(mag=m.red$spNames,
       pred=MFCV.red$R2,
       var_pred=MFCV.red$R2 * varpart.red$vals[1,],
       support=getPostEstimate(hM=m.red, parName="Beta")$support %>% .[2,],
       estimate=getPostEstimate(hM=m.red, parName="Beta")$mean %>% .[2,]) 

genome_fit <- tibble(genome=m.red$spNames, r2 = MFCV.red[[2]])

# genome_counts %>%
#   select(genome, any_of(red_samples)) %>%
#   mutate_if(is.numeric, ~ . / sum(.)) %>%
#   left_join(genome_fit, by="genome") %>%
#   filter(r2>0.1) %>%
#   select(-c(genome,r2)) %>%
#   colSums() %>%
#   hist(main="R2", xlab="Proportion of microbiota")

genome_counts %>%
  select(genome, any_of(red_samples)) %>%
  mutate_if(is.numeric, ~ . / sum(.)) %>%
  left_join(var_pred_table.red, by=join_by("genome"=="mag")) %>%
  filter(var_pred>=0.005) %>%
  select(-c(genome,pred, var_pred, support, estimate)) %>%
  colSums() %>%
  hist(main="Predictive MAGs", xlab="Proportion of microbiota")

# Grey squirrel
MFCV.grey <- evaluateModelFit(hM=m.grey, predY=cv.grey)
mean(MFCV.grey$R2, na.rm = TRUE)
[1] 0.2293484
var_pred_table.grey <- tibble(mag=m.grey$spNames,
       pred=MFCV.grey$R2,
       var_pred=MFCV.grey$R2 * varpart.grey$vals[1,],
       support=getPostEstimate(hM=m.grey, parName="Beta")$support %>% .[2,],
       estimate=getPostEstimate(hM=m.grey, parName="Beta")$mean %>% .[2,]) 

genome_fit <- tibble(genome=m.red$spNames, r2 = MFCV.red[[2]])

genome_counts %>%
  select(genome, any_of(grey_samples)) %>%
  mutate_if(is.numeric, ~ . / sum(.)) %>%
  left_join(var_pred_table.grey, by=join_by("genome"=="mag")) %>%
  filter(var_pred>=0.005) %>%
  select(-c(genome,pred, var_pred, support, estimate)) %>%
  colSums() %>%
  hist(main="Predictive MAGs", xlab="Proportion of microbiota")

10.2 Variance partitioning

10.2.1 Red squirrel

# Varpart table
varpart_table.red <- varpart.red$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(levels))) %>%
   mutate(genome=factor(genome, levels=rev(hmsc_tree$tip.label)))

# Phylums
phylums <- genome_metadata %>%
    filter(genome %in% hmsc_tree$tip.label) %>%
    arrange(match(genome, hmsc_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)

# Basal ggtree
varpart_tree <- hmsc_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylums, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=custom_colors)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
        scale_fill_manual(values=c("#122f3d","#83bb90","#cccccc","#ed8a45","#f6de9c","#b2b530","#be3e2b","#12738f")) +
        geom_fruit(
             data=varpart_table.red,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity")+
      labs(fill="Variable")

vertical_tree

10.2.2 Grey squirrel

# Varpart table
varpart_table.grey <- varpart.grey$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(levels))) %>%
   mutate(genome=factor(genome, levels=rev(hmsc_tree$tip.label)))

# Phylums
phylums <- genome_metadata %>%
    filter(genome %in% hmsc_tree$tip.label) %>%
    arrange(match(genome, hmsc_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)

# Basal ggtree
varpart_tree <- hmsc_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylums, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=custom_colors)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
        scale_fill_manual(values=c("#122f3d","#83bb90","#cccccc","#ed8a45","#f6de9c","#b2b530","#be3e2b","#12738f")) +
        geom_fruit(
             data=varpart_table.grey,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity")+
      labs(fill="Variable")

vertical_tree

10.3 Posterior estimates

# Posterior estimates red
post_estimates_red <- getPostEstimate(hM=m.red, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m.red$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(trend = case_when(
          value >= support_threshold ~ "Positive",
          value <= negsupport_threshold ~ "Negative",
          TRUE ~ "Neutral")) 

post_table_red <- post_estimates_red %>%
    mutate(genome=factor(genome, levels=rev(hmsc_tree$tip.label))) %>%
    select(-trend) %>%
    mutate(value = case_when(
          value >= support_threshold ~ "Positive",
          value <= negsupport_threshold ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    rename(intercept=2,
            index500=3,
            autumn=4,
            winter=5,
            logseqdepth=6) %>%
   column_to_rownames(var="genome") %>%
  select(-intercept)

# Posterior estimates grey
post_estimates_grey <- getPostEstimate(hM=m.grey, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m.grey$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(trend = case_when(
          value >= support_threshold ~ "Positive",
          value <= negsupport_threshold ~ "Negative",
          TRUE ~ "Neutral")) 

post_table_grey <- post_estimates_grey %>%
    mutate(genome=factor(genome, levels=rev(hmsc_tree$tip.label))) %>%
    select(-trend) %>%
    mutate(value = case_when(
          value >= support_threshold ~ "Positive",
          value <= negsupport_threshold ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    rename(intercept=2,
            index500=3,
            autumn=4,
            winter=5,
            logseqdepth=6) %>%
   column_to_rownames(var="genome") %>%
  select(-intercept) 
  
# Basal ggtree
postestimates_tree <- hmsc_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylums, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=custom_colors)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap for red
postestimates_tree_red <- gheatmap(postestimates_tree, post_table_red, offset=0.15, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) 

# Add posterior significant heatmap for grey
postestimates_tree_both <- gheatmap(postestimates_tree_red, post_table_grey, offset=1.5, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend") 


postestimates_tree_both +
        vexpand(.15, 1) + # expand top 
        vexpand(.05, -1) + # expand bottom
        annotate('text', x=3.25, y=-8, label = "S. vulgaris", color='black', fontface=2) +
        annotate('text', x=4.6, y=-8, label = "S. carolinensis", color='black', fontface=2)  

10.4 Correlations among bacteria

10.4.1 Red squirrel

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m.red)

#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>% 
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=rev(hmsc_tree$tip.label))) %>%
      mutate(genome2= factor(genome2, levels=hmsc_tree$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() + 
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void() +
            theme(legend.position = "none") 

corr.legend <- get_legend(matrix, position="right") 
corr.legend <- as_ggplot(corr.legend)

vtree <- hmsc_tree %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5) +
  hexpand(0.5) 
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
vtree <- gheatmap(vtree, phylums, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=custom_colors) +
      theme(legend.position = 'none') + scale_y_reverse()

vtreeD <- hmsc_tree %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5, layout="dendrogram") 
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
vtreeD <- gheatmap(vtreeD, phylums, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=custom_colors) +
      theme(legend.position = 'none') 

# properly align trees to corr matrix with package aplot
matrix %>%  insert_left(vtree, width=.25) %>% insert_top(vtreeD, height=.3) %>% insert_right(corr.legend, width=0.15)

10.4.2 Grey squirrel

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m.grey)

#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>% 
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=rev(hmsc_tree$tip.label))) %>%
      mutate(genome2= factor(genome2, levels=hmsc_tree$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() + 
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void() +
            theme(legend.position = "none") 

corr.legend <- get_legend(matrix, position="right") 
corr.legend <- as_ggplot(corr.legend)

vtree <- hmsc_tree %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5) +
  hexpand(0.5) 
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
vtree <- gheatmap(vtree, phylums, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=custom_colors) +
      theme(legend.position = 'none') + scale_y_reverse()

vtreeD <- hmsc_tree %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5, layout="dendrogram") 
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
vtreeD <- gheatmap(vtreeD, phylums, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=custom_colors) +
      theme(legend.position = 'none') 

# properly align trees to corr matrix with package aplot
matrix %>%  insert_left(vtree, width=.25) %>% insert_top(vtreeD, height=.3) %>% insert_right(corr.legend, width=0.15)

10.5 Microbiome compositional predictions

10.5.1 Selection of predictive MAGs

pred_mags.red<- var_pred_table.red%>%
  filter(var_pred>=0.005) %>%
  pull(mag)

pred.ab.red <- genome_counts %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  filter(genome %in% m.red$spNames) %>%
   rowwise() %>% #compute for each row (genome)
   mutate(
    mean = mean(c_across(all_of(red_samples)), na.rm = TRUE),  # Get mean genome counts across red samples
    prev = sum(c_across(all_of(red_samples)) > 0, na.rm = TRUE) / sum(!is.na(c_across(all_of(red_samples)))), # Proportion of non-zero values
   subset = ifelse(genome %in% pred_mags.red, 'pred', 'nonpred')) %>%
   dplyr::select(genome, subset, mean, prev) %>%
   left_join(genome_metadata, by=join_by(genome==genome)) %>%
   arrange(subset,-mean)

pred.plot.red <- pred.ab.red %>%
  mutate(phylum = ifelse(subset=='pred',phylum, NA)) %>%
  ggplot(., aes(x=prev, y=mean, color=phylum)) +
  geom_point(size=3) +
  scale_color_manual(values=custom_colors) +
  theme_bw()+
    labs(x="Prevalence", y="Mean abundance")

# pred.ab.red %>%
#   ggplot(., aes(x=mean, y=genome, color=subset)) +
#   geom_point(size=3) +
#   theme_bw()+
#     theme(axis.text.y = element_blank())
# 
# pred.ab.red %>%
#   ggplot(., aes(x=prev, y=genome, color=subset)) +
#   geom_point(size=3) +
#   theme_bw()+
#     theme(axis.text.y = element_blank())



pred_mags.grey<- var_pred_table.grey%>%
  filter(var_pred>=0.005) %>%
  pull(mag)

pred.ab.grey <- genome_counts %>% 
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  filter(genome %in% m.grey$spNames) %>%
   rowwise() %>% #compute for each row (genome)
   mutate(
    mean = mean(c_across(all_of(grey_samples)), na.rm = TRUE),  # Get mean genome counts across red samples
    prev = sum(c_across(all_of(grey_samples)) > 0, na.rm = TRUE) / sum(!is.na(c_across(all_of(grey_samples)))), # Proportion of non-zero values
    subset = ifelse(genome %in% pred_mags.grey, 'pred', 'nonpred')) %>%
   dplyr::select(genome, subset, mean, prev) %>%
   left_join(genome_metadata, by=join_by(genome==genome)) %>%
   arrange(subset,-mean)

pred.plot.grey <- pred.ab.grey %>%
  mutate(phylum = ifelse(subset=='pred',phylum, NA)) %>%
  ggplot(., aes(x=prev, y=mean, color=phylum)) +
  geom_point(size=3) +
  scale_color_manual(values=custom_colors) +
  theme_bw()+
    labs(x="Prevalence", y="Mean abundance") +
    theme(y.axis.title =element_blank())

ggarrange(pred.plot.red, pred.plot.grey, legend="right", common.legend = TRUE,
          ncol = 2, nrow=1)
Warning in plot_theme(plot): The `y.axis.title` theme element is not defined in the element hierarchy.

# pred.ab.grey %>%
#   ggplot(., aes(x=mean, y=genome, color=subset)) +
#   geom_point(size=3) +
#   theme_bw()+
#     theme(axis.text.y = element_blank())
# 
# pred.ab.grey %>%
#   ggplot(., aes(x=prev, y=genome, color=subset)) +
#   geom_point(size=3) +
#   theme_bw()+
#     theme(axis.text.y = element_blank())


# count_grey <- genome_counts %>%
#   select(any_of(grey_samples)) %>%
#   tss()
# 
# # Basal ggtree
# count_tree <- hmsc_tree %>%
#         force.ultrametric(.,method="extend") %>%
#         ggtree(., size = 0.3)
#         
# #Add phylum colors next to the tree tips
# count_tree <- gheatmap(count_tree, phylums, offset=-0.2, width=0.1, colnames=FALSE) +
#       scale_fill_manual(values=custom_colors)+
#       labs(fill="Phylum")
# 
# #Reset fill scale to use a different colour profile in the heatmap
# count_tree <- count_tree + new_scale_fill()
# 
# # Add posterior significant heatmap for red
# count_tree_grey <- gheatmap(count_tree, count_grey, offset=0.15, width=0.5, color=NA, colnames=FALSE) +
#   scale_fill_gradient(low = "#f4f4f4", high = "steelblue", na.value="white")
# 
# #Add counts
# vertical_tree <- gheatmap(vertical_tree, genome_counts_log, offset=0.5, width=3.5, color=NA, colnames=FALSE) + #, colnames_angle=90, font.size=2, colnames_position="top", colnames_offset_y = 9
#   vexpand(.08) +
#   coord_cartesian(clip = "off") +
#   scale_fill_gradient(low = "#f4f4f4", high = "steelblue", na.value="white")

10.5.2 Predictions by urbanization

10.5.2.1 Red squirrel

gradient = seq(0,1, by=0.1)
gradientlength = length(gradient)

pred_urban_red <- constructGradient(m.red,
                      focalVariable = "index500",
                      non.focalVariables = list(logseqdepth=list(1),species=list(1), season=list(1)),
                      ngrid=gradientlength) %>%
                      predict(m.red, Gradient = ., expected = TRUE) %>%
                      do.call(rbind,.) %>%
                      as.data.frame() %>%
                      mutate(index500=rep(gradient,1000)) %>%
                      pivot_longer(-c(index500), names_to = "genome", values_to = "value")


post_estimates_red %>%
    filter(variable=="index500", 
           genome %in% pred_mags.red) %>% #keep only predictive mags
    select(genome,trend) %>%
    left_join(pred_urban_red, by=join_by(genome==genome)) %>%
    group_by(genome, trend, index500) %>%
    summarize(value = mean(value, na.rm = TRUE)) %>%
    left_join(genome_metadata, by=join_by(genome == genome)) %>%
    ggplot(aes(x=index500, y=value, group=genome, color=phylum, linetype=trend)) +
        geom_line() +
        scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=custom_colors) +
        facet_grid(fct_rev(trend) ~ phylum) +
        labs(y="Genome abundance (log)",x="Urbanization index") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 0.8,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
               strip.text.x = element_text(angle = 90, hjust = 0,),
               strip.text.y = element_text(face="bold"),
)

10.5.2.2 Grey squirrel

gradient = seq(0,1, by=0.1)
gradientlength = length(gradient)

pred_urban_grey <- constructGradient(m.grey,
                      focalVariable = "index500",
                      non.focalVariables = list(logseqdepth=list(1),species=list(1), season=list(1)),
                      ngrid=gradientlength) %>%
                      predict(m.grey, Gradient = ., expected = TRUE) %>%
                      do.call(rbind,.) %>%
                      as.data.frame() %>%
                      mutate(index500=rep(gradient,1000)) %>%
                      pivot_longer(-c(index500), names_to = "genome", values_to = "value")

post_estimates_grey %>%
    filter(variable=="index500",
           genome %in% pred_mags.grey) %>% #keep only mags predictive mags
    select(genome,trend) %>%
    left_join(pred_urban_grey, by=join_by(genome==genome)) %>%
    group_by(genome, trend, index500) %>%
    summarize(value = mean(value, na.rm = TRUE)) %>%
    left_join(genome_metadata, by=join_by(genome == genome)) %>%
    ggplot(aes(x=index500, y=value, group=genome, color=phylum, linetype=trend)) +
        geom_line() +
        scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=custom_colors) +
        facet_grid(fct_rev(trend) ~ phylum) +
        labs(y="Genome abundance (log)",x="Urbanization index") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 0.8,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
               strip.text.x = element_text(angle = 90, hjust = 0,),
               strip.text.y = element_text(face="bold"),
)

10.5.3 Predictions by season

10.5.3.1 Red squirrel

aut_enrichment_red <- post_estimates_red %>%
    filter(variable=="seasonautumn") %>%
    select(genome, trend)
win_enrichment_red <- post_estimates_red %>%
    filter(variable=="seasonwinter") %>%
    select(genome, trend)

# m$covNames
gradient = c("spring-summer","autumn", "winter")
gradientlength = length(gradient)

pred_season_red <- constructGradient(m.red, 
                      focalVariable = "season", 
                      non.focalVariables = 1,
                      ngrid=gradientlength) %>%
            predict(m.red, Gradient = ., expected = TRUE) %>%
            do.call(rbind,.) %>%
            as.data.frame() %>%
            mutate(season=rep(gradient,1000)) %>%
            pivot_longer(!season,names_to = "genome", values_to = "value")
genome_metadata_clean <- genome_metadata %>%  
    mutate(family = coalesce(family, paste('Unclassified', order)),
           genus = coalesce(genus, 
                              if_else(grepl('^Unclassified', family),
                                      family, paste('Unclassified', family))))

sa_red <- pred_season_red %>%
  filter(genome %in% pred_mags.red) %>% #keep only predictive mags
  left_join(aut_enrichment_red,by="genome") %>%
      filter(trend != "Neutral") %>%
      pivot_wider(names_from = season, values_from = value) %>%
      unnest(c(`autumn`, `spring-summer`)) %>%
      mutate(diff_sa = `spring-summer` - `autumn`) %>%
      select(genome, diff_sa) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_sa), x=diff_sa, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors)+
            scale_fill_manual(values=alpha(custom_colors,0.4))+
            geom_vline(xintercept = 0)+
            annotate('text', x=-10, y=16, label = "Enriched in\nspring-summer", color='black') +
            annotate('text', x=10, y=16, label = "Enriched in\nautumn", color='black') +
            theme_classic()+
            labs(x = "Difference in log-abundance", y = "Genera") +
            xlim(-15,15) +
            theme(axis.title.x= element_blank())

sw_red <-pred_season_red %>%
    filter(genome %in% pred_mags.red) %>% #keep only predictive mags
    left_join(win_enrichment_red,by="genome") %>%
      filter(trend != "Neutral") %>%
      pivot_wider(names_from = season, values_from = value) %>%
      unnest(c(`winter`, `spring-summer`)) %>%
      mutate(diff_sw = `spring-summer` - `winter`) %>%
      select(genome, diff_sw) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_sw), x=diff_sw, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors)+
            scale_fill_manual(values=alpha(custom_colors,0.4))+
            annotate('text', x=-10, y=15, label = "Enriched in\nspring-summer", color='black') + #NB: reversed because it makes no sense, needs to be checked
            annotate('text', x=10, y=15, label = "Enriched in\nwinter", color='black') +
            geom_vline(xintercept = 0)+
            theme_classic()+
            labs(x = "Difference in log-abundance", y = "Genera") +
            xlim(-15,15) + 
            theme(plot.margin = margin(l = 35)) # Left margin)


ggarrange(sa_red, sw_red, legend="right", common.legend = TRUE,
          #labels = c("A", "B", "C"),
          ncol = 1, nrow = 2)

10.5.3.2 Grey squirrel

aut_enrichment_grey <- post_estimates_grey %>%
    filter(variable=="seasonautumn") %>%
    select(genome, trend)
win_enrichment_grey <- post_estimates_grey %>%
    filter(variable=="seasonwinter") %>%
    select(genome, trend)

# m$covNames
pred_season_grey <- constructGradient(m.grey, 
                      focalVariable = "season", 
                      non.focalVariables = 1,
                      ngrid=gradientlength) %>%
            predict(m.grey, Gradient = ., expected = TRUE) %>%
            do.call(rbind,.) %>%
            as.data.frame() %>%
            mutate(season=rep(gradient,1000)) %>%
            pivot_longer(!season,names_to = "genome", values_to = "value")
sa_grey <- pred_season_grey %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  left_join(aut_enrichment_grey,by="genome") %>%
      filter(trend != "Neutral") %>%
      pivot_wider(names_from = season, values_from = value) %>%
      unnest(c(`autumn`, `spring-summer`)) %>%
      mutate(diff_sa = `spring-summer` - `autumn`) %>%
      select(genome, diff_sa) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_sa), x=diff_sa, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors)+
            scale_fill_manual(values=alpha(custom_colors,0.4))+
            geom_vline(xintercept = 0)+
            annotate('text', x=-10, y=14, label = "Enriched in\nspring-summer", color='black') + 
            annotate('text', x=10, y=14, label = "Enriched in\nautumn", color='black') +
            theme_classic()+
            labs(x = "Difference in log-abundance", y = "Genera") +
            xlim(-15,15) +
            theme(axis.title.x= element_blank())
            #+ theme(plot.margin = margin(l = 40)) # Left margin)

sw_grey <- pred_season_grey %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  left_join(aut_enrichment_grey,by="genome") %>%
      filter(trend != "Neutral") %>%
      pivot_wider(names_from = season, values_from = value) %>%
      unnest(c(`winter`, `spring-summer`)) %>%
      mutate(diff_sw = `spring-summer` - `winter`) %>%
      select(genome, diff_sw) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_sw), x=diff_sw, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors) +
            scale_fill_manual(values=alpha(custom_colors,0.4)) +
            geom_vline(xintercept = 0) +
            annotate('text', x=-10, y=14, label = "Enriched in\nspring-summer", color='black') + #NB: reversed because it makes no sense, needs to be checked
            annotate('text', x=10, y=14, label = "Enriched in\nwinter", color='black') +
            theme_classic()+
            labs(x = "Difference in log-abundance", y = "Genera") +
            xlim(-15,15) 
     
ggarrange(sa_grey, sw_grey, legend="right", common.legend = TRUE,
          #labels = c("A", "B", "C"),
          ncol = 1, nrow = 2)

10.6 Microbiome functional predictions

tss <- function(abund){sweep(abund, 2, colSums(abund), FUN="/")} 

genome_counts2 <- genome_counts %>%
  column_to_rownames(var="genome")

#Get list of present MAGs
present_MAGs <- genome_counts2 %>%
  filter(rowSums(.[, -1]) != 0) %>%
  rownames()

#Align distillr annotations with present MAGs and remove all-zero and all-one traits
present_MAGs <- present_MAGs[present_MAGs %in% rownames(genome_gifts)]
genome_gifts_filt <- genome_gifts[present_MAGs,] %>%
  select_if(~!all(. == 0)) %>%  #remove all-zero modules
  select_if(~!all(. == 1)) #remove all-one modules

GIFTs_elements <- to.elements(genome_gifts_filt, GIFT_db)

#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements,GIFT_db)

#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs and get overall metabolic capacity indices per MAG (at the domain level)
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db) %>% as.data.frame() %>%
  mutate(Overall=rowMeans(select(.,Biosynthesis,Structure,Degradation), na.rm=TRUE))

10.6.1 Predictions by urbanisation

10.6.1.1 Functions

10.6.1.1.1 Red squirrel
community_func_urb_red <- pred_urban_red %>%
  filter(genome %in% pred_mags.red) %>% #keep only predictive mags
  group_by(index500, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "index500") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_functions,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="index500")
   })


calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_pred_urb_red <- map_dfc(community_func_urb_red, function(mat) {
      mat %>%
        column_to_rownames(var = "index500") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_func_urb_red[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated with urbanisation
function_pred_urb_red %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
#Negatively associated with urbanisation
function_pred_urb_red %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
positive_red <- function_pred_urb_red%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)
negative_red <- function_pred_urb_red %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_functions_red <- #bind_rows(positive_red,negative_red)  %>%
  function_pred_urb_red %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") 

red_fun_urb_plot <- all_functions_red %>%
  ggplot(aes(x=mean, y=function_legend, xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.12,0.12)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait", title= "S. vulgaris") +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none",
             plot.title=element_text( hjust=0.5, vjust=0.6, face='bold', size=12))
10.6.1.1.2 Grey squirrel
community_func_urb_grey  <- pred_urban_grey %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  group_by(index500, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "index500") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_functions,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="index500")
   })


calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_pred_urb_grey <- map_dfc(community_func_urb_grey, function(mat) {
      mat %>%
        column_to_rownames(var = "index500") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_func_urb_grey[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated with urbanisation
function_pred_urb_grey %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
#Negatively associated with urbanisation
function_pred_urb_grey %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
positive_grey <- function_pred_urb_grey %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)
negative_grey <- function_pred_urb_grey %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_functions_grey <- #bind_rows(positive_grey,negative_grey) %>%
  function_pred_urb_grey %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

grey_fun_urb_plot <- all_functions_grey %>%
  ggplot(aes(x=mean, y=function_legend, xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(-0.12,0.12) +
      geom_vline(xintercept=0) + 
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait", title="S. carolinensis") +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none",
            axis.title.y = element_blank(),
            axis.text.y = element_blank(),
            plot.title=element_text( hjust=0.5, vjust=0.6, face='bold', size=12),
            plot.margin = margin(l = 40))
      

      # plot.margin = margin(t = 40)) 
      # annotate('text', x=0, y=21, label = "S. carolinensis", color='black', fontface=2)
10.6.1.1.3 Comparison between species
ggarrange(red_fun_urb_plot, grey_fun_urb_plot,
          ncol=2, nrow=1)

10.6.1.2 Elements

10.6.1.2.1 Red squirrel
community_elem_urb_red <-  pred_urban_red %>%
  filter(genome %in% pred_mags.red) %>% #keep only predictive mags
  group_by(index500, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "index500") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_elements,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="index500")
   })


calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_pred_urb_red <- map_dfc(community_elem_urb_red, function(mat) {
      mat %>%
        column_to_rownames(var = "index500") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elem_urb_red[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated with urbanisation
element_pred_urb_red %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
#Negatively associated with urbanisation
element_pred_urb_red %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
positive_red <- element_pred_urb_red%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)
negative_red <- element_pred_urb_red %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements_red <- bind_rows(positive_red,negative_red) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive_red$trait),rev(negative_red$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

red_elem_urb_plot <- all_elements_red %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.050,0.050)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait", title="S. vulgaris", color="Function") +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "right",
            plot.title=element_text( hjust=0.45, vjust=0.6, face='bold', size=12))
10.6.1.2.2 Grey squirrel
community_elem_urb_grey <- pred_urban_grey  %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  group_by(index500, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "index500") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_elements,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="index500")
   })


calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_pred_urb_grey <- map_dfc(community_elem_urb_grey, function(mat) {
      mat %>%
        column_to_rownames(var = "index500") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elem_urb_grey[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated with urbanisation
element_pred_urb_grey %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
#Negatively associated with urbanisation
element_pred_urb_grey %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
positive_grey <- element_pred_urb_grey %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)
negative_grey <- element_pred_urb_grey %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)


all_elements_grey <- bind_rows(positive_grey,negative_grey) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive_grey$trait),rev(negative_grey$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

grey_elem_urb_plot <- all_elements_grey %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.050,0.050)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait", title="S. carolinensis", color="Function") +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "right",
            axis.title.y = element_blank(),
            plot.title=element_text( hjust=0.5, vjust=0.6, face='bold', size=12))
10.6.1.2.3 Comparison between species
ggarrange(red_elem_urb_plot, grey_elem_urb_plot, legend="bottom", common.legend=TRUE, 
          ncol=2, nrow=1)

10.6.2 Predictions by season

10.6.2.1 Functions

10.6.2.1.1 Red squirrel
community_func_seas_red <- pred_season_red  %>%
  filter(genome %in% pred_mags.red) %>% #keep only predictive mags
  group_by(season, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "season") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_functions,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="season")
   })

function_pred_seas_red <- map_dfr(community_func_seas_red, function(df) {
  spring_values <- df %>% filter(season == "spring-summer") %>% select(-season)
  winter_values <- df %>% filter(season == "winter") %>% select(-season)
  spring_values - winter_values
}) %>%
  mutate(iteration=c(1:1000)) %>% 
  pivot_longer(!iteration,names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Spring-summer associated
function_pred_seas_red %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
# Winter associated
function_pred_seas_red %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
positive_red <- function_pred_seas_red%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)
negative_red <- function_pred_seas_red %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_functions_red <- function_pred_seas_red %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

red_fun_seas_plot <- all_functions_red %>%
  ggplot(aes(x=mean, y=function_legend, xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-3,3)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Difference in log-abundance",y="Functional trait", title="S. vulgaris") +
      annotate('text', x=-2, y=12, label = "Enriched in\nspring-summer", color='black') +
      annotate('text', x=2, y=12, label = "Enriched in\nwinter", color='black') +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none",
            plot.title=element_text( hjust=0.5, vjust=0.6, face='bold', size=12))
10.6.2.1.2 Grey squirrel
community_func_seas_grey <- pred_season_grey  %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  group_by(season, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "season") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_functions,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="season")
   })

function_pred_seas_grey <- map_dfr(community_func_seas_grey, function(df) {
  spring_values <- df %>% filter(season == "spring-summer") %>% select(-season)
  winter_values <- df %>% filter(season == "winter") %>% select(-season)
  spring_values - winter_values
}) %>%
  mutate(iteration=c(1:1000)) %>% 
  pivot_longer(!iteration,names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Spring-summer associated
function_pred_seas_grey %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
# Winter associated
function_pred_seas_grey %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
positive_grey <- function_pred_seas_grey%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)
negative_grey <- function_pred_seas_grey %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_functions_grey <- function_pred_seas_grey %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

grey_fun_seas_plot <- all_functions_grey %>%
  ggplot(aes(x=mean, y=function_legend, xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-3,3)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Difference in log-abundance",y="Functional trait", title="S. carolinensis") +
      annotate('text', x=-2, y=12, label = "Enriched in\nspring-summer", color='black') +
      annotate('text', x=2, y=12, label = "Enriched in\nwinter", color='black') +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none",
            axis.title.y = element_blank(),
            axis.text.y = element_blank(),
            plot.title=element_text( hjust=0.5, vjust=0.6, face='bold', size=12))
10.6.2.1.3 Comparison between species
ggarrange(red_fun_seas_plot, grey_fun_seas_plot, 
          ncol=2, nrow=1)

10.6.2.2 Elements

10.6.2.2.1 Red squirrel
community_elem_seas_red <- pred_season_red  %>%
  filter(genome %in% pred_mags.red) %>% #keep only predictive mags
  group_by(season, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "season") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_elements,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="season")
   })

element_pred_seas_red <- map_dfr(community_elem_seas_red, function(df) {
  spring_values <- df %>% filter(season == "spring-summer") %>% select(-season)
  winter_values <- df %>% filter(season == "winter") %>% select(-season)
  spring_values - winter_values
}) %>%
  mutate(iteration=c(1:1000)) %>% 
  pivot_longer(!iteration,names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Spring-summer associated
element_pred_seas_red %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
# Winter associated
element_pred_seas_red %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
positive_red <- element_pred_seas_red%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)
negative_red <- element_pred_seas_red %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements_red <- bind_rows(positive_red,negative_red) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive_red$trait),rev(negative_red$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

red_elem_seas_plot <- all_elements_red %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-3.5,3.5)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Difference in log-abundance",y="Functional trait", title = "S. vulgaris", color="Function") +
      annotate('text', x=-2, y=18, label = "Enriched in\nspring-summer", color='black') +  #NB reversed
      annotate('text', x=2, y=18, label = "Enriched in\nwinter", color='black') +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "right",
            plot.title=element_text( hjust=0.45, vjust=0.6, face='bold', size=12))
10.6.2.2.2 Grey squirrel
community_elem_seas_grey <- pred_season_grey  %>%
  filter(genome %in% pred_mags.grey) %>% #keep only predictive mags
  group_by(season, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "season") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_elements,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="season")
   })

element_pred_seas_grey <- map_dfr(community_elem_seas_grey, function(df) {
  spring_values <- df %>% filter(season == "spring-summer") %>% select(-season)
  winter_values <- df %>% filter(season == "winter") %>% select(-season)
  spring_values - winter_values
}) %>%
  mutate(iteration=c(1:1000)) %>% 
  pivot_longer(!iteration,names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Spring-summer associated
element_pred_seas_grey %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
# Winter associated
element_pred_seas_grey %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
positive_grey <- element_pred_seas_grey%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)
negative_grey <- element_pred_seas_grey %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements_grey <- bind_rows(positive_grey,negative_grey) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive_grey$trait),rev(negative_grey$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

grey_elem_seas_plot <- all_elements_grey %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-3.5,3.5)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Difference in log-abundance",y="Functional trait", title ="S. carolinensis", color="Function") +
      annotate('text', x=-2, y=20, label = "Enriched in\nspring-summer", color='black') + #NB reversed
      annotate('text', x=2, y=20, label = "Enriched in\nwinter", color='black') +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "right",
            axis.title.y = element_blank(),
            plot.title=element_text( hjust=0.5, vjust=0.6, face='bold', size=12))
10.6.2.2.2.1 Comparison between species
ggarrange(red_elem_seas_plot, grey_elem_seas_plot, legend="bottom", common.legend=TRUE,
          nrow=1, ncol=2)